PARCOMPUTE = TRUE
N_CORE = parallel::detectCores()
In this notebook, we repeat the analysis of 02_temporal_heterogeneity.Rmd for all of our core indicators.
# Fetch the following sources and signals from the API
# TODO: Add Google Symptoms "eventually"
source_names = c("doctor-visits", "fb-survey", "fb-survey",
"hospital-admissions", "hospital-admissions")
signal_names = c("smoothed_adj_cli", "smoothed_cli", "smoothed_hh_cmnty_cli",
"smoothed_adj_covid19_from_claims", "smoothed_adj_covid19_from_claims")
pretty_names = c("Doctor visits", "Facebook CLI", "Facebook CLI-in-community",
"Hospitalizations", "Hospitalizations")
target_names = c("Cases", "Cases", "Cases", "Cases", "Deaths")
geo_level = params$geo_value
start_day = "2020-04-15"
end_day = NULL
cache_fname = sprintf('cached_data/12_heterogeneity_core_indicators_%s.RDS',
geo_level)
if (!file.exists(cache_fname)) {
df_signals = vector("list", length(signal_names))
for (i in 1:length(signal_names)) {
df_signals[[i]] = suppressWarnings(
covidcast_signal(source_names[i], signal_names[i],
start_day, end_day,
geo_type=geo_level))
}
# Fetch USAFacts confirmed case incidence proportion (smoothed with 7-day
# trailing average)
df_cases = suppressWarnings(
covidcast_signal("usa-facts", "confirmed_7dav_incidence_prop",
start_day, end_day,
geo_type=geo_level))
df_deaths = suppressWarnings(
covidcast_signal("usa-facts", "deaths_7dav_incidence_prop",
start_day, end_day,
geo_type=geo_level))
saveRDS(list(df_signals, df_cases, df_deaths), cache_fname)
} else {
cached_data = readRDS(cache_fname)
df_signals = cached_data[[1]]
df_cases = cached_data[[2]]
df_deaths = cached_data[[3]]
}
case_num = 500
if (geo_level == 'county') {
geo_values = suppressWarnings(covidcast_signal("usa-facts", "confirmed_cumulative_num",
'2020-11-01',
'2020-11-01')) %>%
filter(value >= case_num) %>% pull(geo_value)
} else if (geo_level == 'state') {
geo_values = unique(df_signals[[1]]$geo_value)
}
sensorize_time_ranges = list(
c(-42, -8),
c(-49, -8),
c(-56, -8),
c(-63, -8),
c(-70, -8)
)
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
ind_df = tibble(df_signals[[ind_idx]]) %>% filter(geo_value %in% geo_values)
ind_target = inner_join(ind_df, tibble(df_target),
by=c('geo_value', 'time_value')) %>% select (
geo_value=geo_value,
time_value=time_value,
indicator_value=value.x,
target_value=value.y,
)
ind_global_sensorized = ind_target %>% group_by (
geo_value,
) %>% group_modify ( ~ {
fit = lm(target_value ~ indicator_value, data =.x);
tibble(time_value=.x$time_value,
indicator_value=.x$indicator_value,
target_value=.x$target_value,
sensorized_value=fit$fitted.values)
}) %>% ungroup
df_global_sensorized = ind_global_sensorized %>% transmute (
geo_value=geo_value,
signal='ind_sensorized',
time_value=time_value,
direction=NA,
issue=lubridate::ymd('2020-11-01'),
lag=NA,
value=sensorized_value,
stderr=NA,
sample_size=NA,
data_source='linear_sensorization',
)
attributes(df_global_sensorized)$geo_type = 'county'
attributes(df_global_sensorized)$metadata$geo_type = 'county'
class(df_global_sensorized) = c("covidcast_signal", "data.frame")
base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
if (!file.exists(base_cor_fname)) {
df_cor_base_ind = covidcast_cor(df_signals[[ind_idx]], df_target,
by='time_value', method='spearman')
df_cor_sensorized_ind = covidcast_cor(df_global_sensorized, df_target,
by='time_value', method='spearman')
df_cor_base = rbind(df_cor_base_ind, df_cor_sensorized_ind)
df_cor_base$Indicator = as.factor(c(rep('Raw', nrow(df_cor_base_ind)),
rep('Sensorized (Spatial)',
nrow(df_cor_sensorized_ind))))
saveRDS(df_cor_base, base_cor_fname)
} else {
df_cor_base = readRDS(base_cor_fname)
}
sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
if (!file.exists(sensorize_fname)) {
sensorize_cors = vector('list', length(sensorize_time_ranges))
ind_target_sensorized_list = vector('list', length(sensorize_time_ranges))
for (outer_idx in 1:length(sensorize_time_ranges)) {
sensorize_llim = sensorize_time_ranges[[outer_idx]][1]
sensorize_ulim = sensorize_time_ranges[[outer_idx]][2]
min_sensorize_date = lubridate::ymd(start_day) - sensorize_llim
max_sensorize_date = max(ind_target$time_value)
sensorize_date_offsets = 0:(max_sensorize_date-min_sensorize_date)
joiner_df_list = vector('list', length(sensorize_date_offsets))
for (idx in 1:length(sensorize_date_offsets)) {
dt = sensorize_date_offsets[idx]
sensorize_date = min_sensorize_date + dt
joiner_df_list[[idx]] = tibble(
sensorize_date = sensorize_date,
time_value = sensorize_date + sensorize_llim:sensorize_ulim)
}
joiner_df = bind_rows(joiner_df_list)
if (!PARCOMPUTE) {
ind_sensorized_lm = ind_target %>% inner_join (
joiner_df,
on='time_value',
) %>% group_by (
geo_value,
sensorize_date,
) %>% group_modify (
~ broom::tidy(lm(target_value ~ indicator_value, data = .x))
) %>% ungroup
} else {
ind_grouped_list = ind_target %>% inner_join (
joiner_df,
on='time_value',
) %>% group_by (
geo_value,
sensorize_date,
) %>% group_split
ind_sensorized_lm = parallel::mclapply(ind_grouped_list, function(df) {
broom::tidy(
lm(target_value ~ indicator_value, data = df)
) %>% mutate (
geo_value = unique(df$geo_value),
sensorize_date = unique(df$sensorize_date),
)}, mc.cores = N_CORE) %>% bind_rows
}
ind_sensorized_wide = ind_sensorized_lm %>% select(
geo_value,
sensorize_date,
term,
estimate,
) %>% mutate (
term = sapply(term, function(x) {ifelse(x=='(Intercept)',
'intercept',
'slope')}),
) %>% pivot_wider (
id_cols = c(geo_value, sensorize_date),
names_from=term,
values_from=estimate,
)
ind_target_sensorized = ind_target %>% inner_join (
ind_sensorized_wide,
by=c('time_value'='sensorize_date',
'geo_value'),
) %>% mutate (
sensorized_value = intercept + indicator_value * slope,
)
df_sensorized = ind_target_sensorized %>% transmute (
geo_value=geo_value,
signal='ind_sensorized',
time_value=time_value,
direction=NA,
issue=lubridate::ymd('2020-11-01'),
lag=NA,
value=sensorized_value,
stderr=NA,
sample_size=NA,
data_source='linear_sensorization',
)
attributes(df_sensorized)$geo_type = 'county'
class(df_sensorized) = c("covidcast_signal", "data.frame")
df_cor_sensorized_ind = covidcast_cor(df_sensorized, df_target,
by='time_value', method='spearman')
df_cor_sensorized_ind$Indicator = sprintf('Sensorized (TS, %d:%d)',
sensorize_llim,
sensorize_ulim)
sensorize_cors[[outer_idx]] = df_cor_sensorized_ind
ind_target_sensorized_list[[outer_idx]] = ind_target_sensorized
}
saveRDS(sensorize_cors, sensorize_fname)
saveRDS(ind_target_sensorized_list, sensorize_val_fname)
} else {
sensorize_cors = readRDS(sensorize_fname)
ind_target_sensorized_list = readRDS(sensorize_val_fname)
}
df_cor = bind_rows(df_cor_base, sensorize_cors)
df_cor$Indicator = stringr::str_replace(df_cor$Indicator,
'Sensorized ',
"")
df_cor$Indicator = factor(df_cor$Indicator,
levels=c('Raw',
"(Spatial)",
sapply(sensorize_time_ranges,
function(x) {
sprintf("(TS, %d:%d)",
x[[1]], x[[2]])
})))
plt = ggplot(df_cor, aes(x = time_value, y = value)) +
geom_line(aes(color = Indicator)) +
labs(title = sprintf("Correlation between %s and %s",
pretty_names[ind_idx],
target_names[ind_idx]),
subtitle = "Per day",
x = "Date", y = "Correlation") +
theme(legend.position = "bottom")
print(plt)
}
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 287 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 281 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 281 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 287 row(s) containing missing values (geom_path).
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Joining, by = "time_value"
## Warning: Removed 287 row(s) containing missing values (geom_path).
QUANTS = c(0.01, 0.99)
# TODO: Add more "core indicators"
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
df_cor_base = readRDS(base_cor_fname)
sensorize_cors = readRDS(sensorize_fname)
sensorized_vals = readRDS(sensorize_val_fname)
for (inner_idx in 1:length(sensorize_time_ranges)) {
sv = sensorized_vals[[inner_idx]]
print(summary(sv$slope))
print(slope_limits <- quantile(sv$slope, QUANTS, na.rm=TRUE))
plt = ggplot(
sensorized_vals[[inner_idx]],
aes(x=time_value,
y=slope),
) + geom_point (
alpha=0.1,
size=0.5,
) + geom_hline (
yintercept=0,
colour='white',
) + stat_summary (
aes(y=slope,
group=1,
colour='median'),
fun=median,
geom="line",
group=1,
) + stat_summary (
aes(y=slope,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) + mad(x) },
geom="line",
group=1,
) + stat_summary (
aes(y=slope,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) - mad(x) },
geom="line",
group=1,
) + scale_colour_manual(
values=c("median"="maroon",
"+/- mad"="darkgreen")
) + labs(
colour=''
) + ggtitle(
sprintf("Slope distribution for %s[%s], fitted on t in %d:%d",
pretty_names[ind_idx],
target_names[ind_idx],
sensorize_time_ranges[[inner_idx]][1],
sensorize_time_ranges[[inner_idx]][2])
) + ylim (
slope_limits[[1]], slope_limits[[2]]
)
print(plt)
}
}
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -6.99505 -0.07659 0.85530 1.79235 2.78705 22.67845
## 1% 99%
## -3.527572 14.757571
## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -5.9632 -0.0022 0.9907 1.9269 2.9005 22.6117
## 1% 99%
## -2.848045 14.762133
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.14923 0.08725 1.13688 2.04799 3.00309 20.26235
## 1% 99%
## -2.483166 15.210452
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.8580 0.1961 1.2541 2.1569 3.1310 20.8083
## 1% 99%
## -2.031058 15.452289
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.6155 0.2806 1.3897 2.2576 3.2259 20.9600
## 1% 99%
## -1.973515 15.316266
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -47.4643 -0.6898 5.2872 11.8956 20.9973 148.6280
## 1% 99%
## -25.83519 78.26310
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -39.52026 -0.08779 7.10494 13.85845 24.39977 150.64244
## 1% 99%
## -21.95926 81.65420
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -37.7967 0.3934 9.1225 15.6121 27.1180 139.5317
## 1% 99%
## -18.91136 83.84794
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -27.2519 0.7216 10.6485 17.1112 28.8532 132.2374
## 1% 99%
## -15.47654 84.35775
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -29.285 1.306 12.542 18.500 30.662 125.651
## 1% 99%
## -14.46024 84.06255
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.8710 0.2182 0.9772 1.1369 1.8059 9.1133
## 1% 99%
## -1.367857 4.983324
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.5023 0.3095 1.0449 1.1708 1.8254 8.5906
## 1% 99%
## -1.223981 4.621978
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.1501 0.4219 1.0847 1.1977 1.8257 7.8688
## 1% 99%
## -0.9932766 4.3220581
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.2332 0.5277 1.1455 1.2276 1.8285 6.7969
## 1% 99%
## -0.6274733 4.0525981
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9483 0.6222 1.1853 1.2632 1.8353 6.3926
## 1% 99%
## -0.3992849 3.9132253
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -39.80474 -0.00589 0.92305 1.64847 2.52858 49.86180 3
## 1% 99%
## -4.554879 15.847305
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -53.72601 0.08357 1.06095 1.76913 2.72513 41.11893 2
## 1% 99%
## -4.142148 15.016964
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -11.1694 0.1807 1.1951 1.8689 2.9219 36.4991 2
## 1% 99%
## -3.950131 14.224453
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.686 0.246 1.323 1.964 3.091 33.875
## 1% 99%
## -3.905274 14.143066
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -11.018 0.335 1.441 2.055 3.221 33.579
## 1% 99%
## -3.21255 13.60217
## Warning: Removed 148 rows containing non-finite values (stat_summary).
## Warning: Removed 148 rows containing non-finite values (stat_summary).
## Warning: Removed 148 rows containing non-finite values (stat_summary).
## Warning: Removed 148 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.803954 -0.003612 0.013962 0.021795 0.041723 1.095137 3
## 1% 99%
## -0.1736429 0.2959707
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing non-finite values (stat_summary).
## Warning: Removed 181 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.370046 -0.001546 0.016001 0.024139 0.042797 1.095137 2
## 1% 99%
## -0.1384968 0.3026368
## Warning: Removed 172 rows containing non-finite values (stat_summary).
## Warning: Removed 172 rows containing non-finite values (stat_summary).
## Warning: Removed 172 rows containing non-finite values (stat_summary).
## Warning: Removed 172 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -0.4333028 -0.0002924 0.0171806 0.0256139 0.0436638 0.8118412 2
## 1% 99%
## -0.1270568 0.2893027
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing non-finite values (stat_summary).
## Warning: Removed 166 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.438961 0.000788 0.019057 0.027058 0.044112 0.607658
## 1% 99%
## -0.1350308 0.3159032
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.393282 0.003056 0.020350 0.029035 0.044812 0.602471
## 1% 99%
## -0.1002845 0.2888718
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing missing values (geom_point).
QUANTS = c(0.01, 0.99)
# TODO: Add more "core indicators"
for (ind_idx in 1:length(source_names)) {
if (target_names[ind_idx] == 'Cases') {
df_target = df_cases
} else if (target_names[ind_idx] == 'Deaths') {
df_target = df_deaths
} else {
stop(sprintf("No matching dataframe for target %s.", target_names[ind_idx]))
}
base_cor_fname = sprintf('results/12_base_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_fname = sprintf('results/12_sensorize_cors_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
sensorize_val_fname = sprintf('results/12_sensorize_vals_%s_%s_%s_%s.RDS',
geo_level,
source_names[ind_idx], signal_names[ind_idx],
target_names[ind_idx])
df_cor_base = readRDS(base_cor_fname)
sensorize_cors = readRDS(sensorize_fname)
sensorized_vals = readRDS(sensorize_val_fname)
for (inner_idx in 1:length(sensorize_time_ranges)) {
sv = sensorized_vals[[inner_idx]]
print(summary(sv$intercept))
print(intercept_limits <- quantile(sv$intercept, QUANTS, na.rm=TRUE))
plt = ggplot(
sensorized_vals[[inner_idx]],
aes(x=time_value,
y=intercept),
) + geom_point (
alpha=0.1,
size=0.5,
) + geom_hline (
yintercept=0,
colour='white',
) + stat_summary (
aes(y=intercept,
group=1,
colour='median'),
fun=median,
geom="line",
group=1,
) + stat_summary (
aes(y=intercept,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) + mad(x) },
geom="line",
group=1,
) + stat_summary (
aes(y=intercept,
group=1,
colour='+/- mad'),
fun=function(x) { median(x) - mad(x) },
geom="line",
group=1,
) + scale_colour_manual(
values=c("median"="maroon",
"+/- mad"="darkgreen")
) + labs(
colour=''
) + ggtitle(
sprintf("Intercept distribution for %s[%s], fitted on t in %d:%d",
pretty_names[ind_idx],
target_names[ind_idx],
sensorize_time_ranges[[inner_idx]][1],
sensorize_time_ranges[[inner_idx]][2])
) + ylim (
intercept_limits[[1]], intercept_limits[[2]]
)
print(plt)
}
}
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -85.22700 0.06234 3.65336 4.60559 9.02520 92.78432
## 1% 99%
## -38.51123 48.60214
## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing non-finite values (stat_summary).
## Warning: Removed 188 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -87.8276 -0.1411 3.2904 4.0433 8.6604 89.6721
## 1% 99%
## -35.55893 40.42421
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -79.3462 -0.4356 2.9715 3.5629 8.3476 72.0629
## 1% 99%
## -30.36604 33.38218
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing non-finite values (stat_summary).
## Warning: Removed 174 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -77.589 -0.649 2.753 3.109 7.952 51.441
## 1% 99%
## -29.48563 28.49836
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -67.4429 -0.9262 2.4858 2.6871 7.4899 49.1192
## 1% 99%
## -29.18673 25.69813
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing non-finite values (stat_summary).
## Warning: Removed 160 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -60.3477 0.6356 5.0620 5.1291 10.5415 60.3333
## 1% 99%
## -31.39289 36.57134
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -61.948 -0.293 4.345 3.781 9.576 47.869
## 1% 99%
## -32.05857 32.16370
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -64.570 -1.543 3.757 2.660 8.681 50.328
## 1% 99%
## -32.85714 29.26475
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -56.555 -2.717 3.126 1.763 7.986 41.071
## 1% 99%
## -32.24887 26.76281
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -53.0221 -3.8175 2.5191 0.9694 7.4169 39.2552
## 1% 99%
## -31.92991 25.29219
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -269.928 -22.600 -7.985 -13.527 1.961 75.513
## 1% 99%
## -130.44641 34.23281
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing non-finite values (stat_summary).
## Warning: Removed 190 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -244.4186 -22.6611 -9.1787 -13.9614 0.8268 63.4465
## 1% 99%
## -115.11382 31.26952
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing non-finite values (stat_summary).
## Warning: Removed 182 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -227.53407 -22.54198 -9.79478 -14.17981 0.04779 65.54112
## 1% 99%
## -99.28330 29.07741
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing non-finite values (stat_summary).
## Warning: Removed 176 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -192.637 -22.553 -10.593 -14.460 -1.005 52.440
## 1% 99%
## -88.08429 24.74444
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing non-finite values (stat_summary).
## Warning: Removed 168 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -179.451 -22.544 -11.244 -14.913 -2.342 47.856
## 1% 99%
## -80.06907 18.63802
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing non-finite values (stat_summary).
## Warning: Removed 162 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -95.994 2.123 5.344 6.974 10.556 91.850
## 1% 99%
## -17.84004 39.63661
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -51.923 1.997 5.148 6.543 9.907 91.850
## 1% 99%
## -14.88075 33.94944
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -39.315 1.872 4.797 6.162 9.438 103.884
## 1% 99%
## -12.60972 33.16308
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -37.107 1.758 4.517 5.836 8.995 98.408
## 1% 99%
## -11.74648 31.21223
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -39.296 1.599 4.294 5.524 8.619 98.408
## 1% 99%
## -10.47504 28.78262
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.70649 0.03892 0.11684 0.17133 0.22964 2.14846
## 1% 99%
## -0.360243 1.359333
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing non-finite values (stat_summary).
## Warning: Removed 178 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.61498 0.03942 0.11333 0.16214 0.22129 1.71702
## 1% 99%
## -0.2694737 1.1420245
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing non-finite values (stat_summary).
## Warning: Removed 170 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.65464 0.03788 0.11213 0.15431 0.21321 2.60893
## 1% 99%
## -0.2440557 0.9950797
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing non-finite values (stat_summary).
## Warning: Removed 164 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.63622 0.03826 0.11140 0.14786 0.20371 2.17876
## 1% 99%
## -0.2622776 0.8975714
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing non-finite values (stat_summary).
## Warning: Removed 156 rows containing missing values (geom_point).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.50874 0.03885 0.10910 0.14033 0.19236 2.17876
## 1% 99%
## -0.2218097 0.7774261
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing non-finite values (stat_summary).
## Warning: Removed 150 rows containing missing values (geom_point).